{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10000\n",
      "20000\n",
      "30000\n",
      "40000\n",
      "50000\n",
      "60000\n",
      "70000\n",
      "80000\n",
      "90000\n",
      "100000\n",
      "110000\n",
      "120000\n",
      "130000\n",
      "140000\n",
      "150000\n",
      "160000\n",
      "170000\n",
      "180000\n",
      "190000\n",
      "200000\n",
      "210000\n",
      "220000\n",
      "230000\n",
      "240000\n",
      "250000\n",
      "260000\n",
      "270000\n",
      "280000\n",
      "290000\n",
      "300000\n",
      "310000\n",
      "320000\n",
      "330000\n",
      "340000\n",
      "350000\n",
      "360000\n",
      "370000\n",
      "380000\n",
      "390000\n",
      "400000\n",
      "410000\n",
      "420000\n",
      "430000\n",
      "440000\n",
      "450000\n",
      "460000\n",
      "470000\n",
      "480000\n",
      "490000\n",
      "500000\n",
      "510000\n",
      "520000\n",
      "530000\n",
      "540000\n",
      "550000\n",
      "560000\n",
      "570000\n",
      "580000\n",
      "590000\n",
      "600000\n",
      "610000\n",
      "620000\n",
      "630000\n",
      "640000\n",
      "650000\n",
      "660000\n",
      "670000\n",
      "680000\n",
      "690000\n",
      "700000\n",
      "710000\n",
      "720000\n",
      "730000\n",
      "740000\n",
      "750000\n",
      "760000\n",
      "770000\n",
      "780000\n",
      "790000\n",
      "800000\n",
      "810000\n",
      "820000\n",
      "830000\n",
      "840000\n",
      "850000\n",
      "860000\n",
      "870000\n",
      "880000\n",
      "890000\n",
      "900000\n",
      "910000\n",
      "920000\n",
      "930000\n",
      "940000\n",
      "950000\n",
      "960000\n",
      "970000\n",
      "980000\n",
      "990000\n",
      "1000000\n",
      "1010000\n",
      "1020000\n",
      "1030000\n",
      "1040000\n",
      "1050000\n",
      "1060000\n",
      "1070000\n",
      "1080000\n",
      "1090000\n",
      "1100000\n",
      "1110000\n",
      "1120000\n",
      "1130000\n",
      "1140000\n",
      "1150000\n",
      "1160000\n",
      "1170000\n",
      "1180000\n",
      "1190000\n",
      "1200000\n",
      "1210000\n",
      "1220000\n",
      "1230000\n",
      "1240000\n",
      "1250000\n",
      "1260000\n",
      "1270000\n",
      "1280000\n",
      "1290000\n",
      "1300000\n",
      "1310000\n",
      "1320000\n",
      "1330000\n",
      "1340000\n",
      "1350000\n",
      "1360000\n",
      "1370000\n",
      "1380000\n",
      "1390000\n",
      "1400000\n",
      "1410000\n",
      "1420000\n",
      "1430000\n",
      "1440000\n",
      "1450000\n",
      "1460000\n",
      "1470000\n",
      "1480000\n",
      "1490000\n",
      "1500000\n",
      "1510000\n",
      "1520000\n",
      "1530000\n",
      "1540000\n",
      "1550000\n",
      "1560000\n",
      "1570000\n",
      "1580000\n",
      "1590000\n",
      "1600000\n",
      "1610000\n",
      "1620000\n",
      "1630000\n",
      "1640000\n",
      "1650000\n",
      "1660000\n",
      "1670000\n",
      "1680000\n",
      "1690000\n",
      "1700000\n",
      "1710000\n",
      "1720000\n",
      "1730000\n",
      "1740000\n",
      "1750000\n",
      "1760000\n",
      "1770000\n",
      "1780000\n",
      "1790000\n",
      "1800000\n",
      "1810000\n",
      "1820000\n",
      "1830000\n",
      "1840000\n",
      "1850000\n",
      "1860000\n",
      "1870000\n",
      "1880000\n",
      "1890000\n",
      "1900000\n",
      "1910000\n",
      "1920000\n",
      "1930000\n",
      "1940000\n",
      "1950000\n",
      "1960000\n",
      "1970000\n",
      "1980000\n",
      "1990000\n",
      "2000000\n",
      "2010000\n",
      "2020000\n",
      "2030000\n",
      "2040000\n",
      "2050000\n",
      "2060000\n",
      "2070000\n",
      "2080000\n",
      "2090000\n",
      "2100000\n",
      "2110000\n",
      "2120000\n",
      "2130000\n",
      "2140000\n",
      "2150000\n",
      "2160000\n",
      "2170000\n",
      "2180000\n",
      "2190000\n",
      "2200000\n",
      "2210000\n",
      "2220000\n",
      "2230000\n",
      "2240000\n",
      "2250000\n",
      "2260000\n",
      "2270000\n",
      "2280000\n",
      "2290000\n",
      "2300000\n",
      "2310000\n",
      "2320000\n",
      "2330000\n",
      "2340000\n",
      "2350000\n",
      "2360000\n",
      "2370000\n",
      "2380000\n",
      "2390000\n",
      "2400000\n",
      "2410000\n",
      "2420000\n",
      "2430000\n",
      "2440000\n",
      "2450000\n",
      "2460000\n",
      "2470000\n",
      "2480000\n",
      "2490000\n",
      "2500000\n",
      "2510000\n",
      "2520000\n",
      "2530000\n",
      "2540000\n",
      "2550000\n",
      "2560000\n",
      "2570000\n",
      "2580000\n",
      "2590000\n",
      "2600000\n",
      "2610000\n",
      "2620000\n",
      "2630000\n",
      "2640000\n",
      "2650000\n",
      "2660000\n",
      "2670000\n",
      "2680000\n",
      "2690000\n",
      "2700000\n",
      "2710000\n",
      "2720000\n",
      "2730000\n",
      "2740000\n",
      "2750000\n",
      "2760000\n",
      "2770000\n",
      "2780000\n",
      "2790000\n",
      "2800000\n",
      "2810000\n",
      "2820000\n",
      "2830000\n",
      "2840000\n",
      "2850000\n",
      "2860000\n",
      "2870000\n",
      "2880000\n",
      "2890000\n",
      "2900000\n",
      "2910000\n",
      "2920000\n",
      "2930000\n",
      "2940000\n",
      "2950000\n",
      "2960000\n",
      "2970000\n",
      "2980000\n",
      "2990000\n",
      "10000\n",
      "20000\n",
      "30000\n",
      "40000\n",
      "50000\n",
      "60000\n",
      "70000\n",
      "80000\n",
      "90000\n",
      "100000\n",
      "110000\n",
      "120000\n",
      "130000\n",
      "140000\n",
      "150000\n",
      "160000\n",
      "170000\n",
      "180000\n",
      "190000\n",
      "200000\n",
      "210000\n",
      "haha 2999967\n",
      "0 117.421642971 226.39243558\n",
      "haha 217360\n",
      "0 117.105364372 223.163609612\n",
      "haha 2999967\n",
      "0 58.2854154729 119.067742006\n",
      "haha 217360\n",
      "0 58.1745629371 117.683934953\n"
     ]
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "%matplotlib inline\n",
    "\n",
    "def getQuesInfo(fpath):\n",
    "    f = open(fpath)\n",
    "    cnt = 0\n",
    "    idx = []\n",
    "    idx, title_char, title_word, desc_char, desc_word = [], [], [], [], []\n",
    "    count = 0\n",
    "    for line in f:\n",
    "        cnt += 1\n",
    "        if cnt % 10000 == 0:\n",
    "            print cnt\n",
    "        terms = line.strip().split('\\t')\n",
    "        idx.append(terms[0])\n",
    "        if len(terms) == 5:\n",
    "            title_char.append(terms[1])\n",
    "            title_word.append(terms[2])\n",
    "            desc_char.append(terms[3])\n",
    "            desc_word.append(terms[4])\n",
    "        elif len(terms) == 4:\n",
    "            title_char.append(terms[1])\n",
    "            title_word.append(terms[2])\n",
    "            desc_char.append(terms[3])\n",
    "            desc_word.append('')\n",
    "        elif len(terms) == 3:\n",
    "            title_char.append(terms[1])\n",
    "            title_word.append(terms[2])\n",
    "            desc_char.append('')\n",
    "            desc_word.append('')\n",
    "        elif len(terms) == 1:\n",
    "            title_char.append('')\n",
    "            title_word.append('')\n",
    "            desc_char.append('')\n",
    "            desc_word.append('')\n",
    "    f.close()\n",
    "    return idx, title_char, title_word, desc_char, desc_word\n",
    "\n",
    "def getLenInfo(arr):\n",
    "    # lenArray = [np.log(len([ii for ii in item.split(',') if ii != ''])+1) for item in arr]\n",
    "    # lenArray = [len([ii for ii in item.split(',') if ii != '']) for item in arr if len([ii for ii in item.split(',') if ii != '']) != 0]\n",
    "    lenArray = [len([ii for ii in item.split(',') if ii != '']) for item in arr]\n",
    "    return lenArray\n",
    "\n",
    "def plotHist(train_len, eval_len, save_name):\n",
    "    save_path = './' + save_name + '.png'\n",
    "    plt.figure()\n",
    "    plt.title(save_name)\n",
    "    plt.xlabel(\"Length\")\n",
    "    plt.ylabel(\"Count\")\n",
    "    # plt.xlim(-10, 60)\n",
    "    _, _, train = plt.hist(train_len, bins=100, normed=1, alpha=.2, color='b')\n",
    "    _, _, evalu = plt.hist(eval_len, bins=100, normed=1, alpha=.2, color='r')\n",
    "    plt.legend([train[0], evalu[0]], ['train', 'test'])\n",
    "    plt.savefig(save_path)\n",
    "\n",
    "def analysis(train_path, eval_path):\n",
    "    train_idx, train_title_char, train_title_word, train_desc_char, train_desc_word = getQuesInfo(train_path)\n",
    "    eval_idx, eval_title_char, eval_title_word, eval_desc_char, eval_desc_word = getQuesInfo(eval_path)\n",
    "    desc_char_len = np.array(getLenInfo(train_desc_char))\n",
    "    print 'haha', len(desc_char_len)\n",
    "    print np.min(desc_char_len), np.average(desc_char_len), np.std(desc_char_len)\n",
    "    desc_char_len = np.array(getLenInfo(eval_desc_char))\n",
    "    print 'haha', len(desc_char_len)\n",
    "    print np.min(desc_char_len), np.average(desc_char_len), np.std(desc_char_len)\n",
    "    desc_word_len = np.array(getLenInfo(train_desc_word))\n",
    "    print 'haha', len(desc_word_len)\n",
    "    print np.min(desc_word_len), np.average(desc_word_len), np.std(desc_word_len)\n",
    "    desc_word_len = np.array(getLenInfo(eval_desc_word))\n",
    "    print 'haha', len(desc_word_len)\n",
    "    print np.min(desc_word_len), np.average(desc_word_len), np.std(desc_word_len)\n",
    "    # plotHist(getLenInfo(train_title_char), getLenInfo(eval_title_char), \"question_title_char\")\n",
    "    # plotHist(getLenInfo(train_title_word), getLenInfo(eval_title_word), \"question_title_word\")\n",
    "    # plotHist(getLenInfo(train_desc_char), getLenInfo(eval_desc_char), \"question_desc_char\")\n",
    "    # plotHist(getLenInfo(train_desc_word), getLenInfo(eval_desc_word), \"question_desc_word\")\n",
    "\n",
    "train_question_file = \"../ieee_zhihu_cup/question_train_set.txt\"\n",
    "eval_question_file = \"../ieee_zhihu_cup/question_eval_set.txt\"\n",
    "analysis(train_question_file, eval_question_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "%matplotlib inline\n",
    "\n",
    "def getTopicInfo(fpath):\n",
    "    f = open(fpath)\n",
    "    cnt = 0\n",
    "    idx, pidx, title_char, title_word, desc_char, desc_word = set(), set(), [], [], [], []\n",
    "    for line in f:\n",
    "        cnt += 1\n",
    "        if cnt % 100 == 0:\n",
    "            print cnt\n",
    "        terms = line.strip().split('\\t')\n",
    "        idx.add(terms[0])\n",
    "        if terms[1] != '':\n",
    "            for ii in terms[1].split(','):\n",
    "                if ii != '':\n",
    "                    pidx.add(ii)\n",
    "    print len(idx), len(pidx)\n",
    "#         if len(terms) == 6:\n",
    "#             pidx.append(terms[1])\n",
    "#             title_char.append(terms[2])\n",
    "#             title_word.append(terms[3])\n",
    "#             desc_char.append(terms[4])\n",
    "#             desc_word.append(terms[5])\n",
    "#         elif len(terms) == 5:\n",
    "#             pidx.append(terms[1])\n",
    "#             title_char.append(terms[2])\n",
    "#             title_word.append(terms[3])\n",
    "#             desc_char.append(terms[4])\n",
    "#             desc_word.append('')\n",
    "#         elif len(terms) == 4:\n",
    "#             pidx.append(terms[1])\n",
    "#             title_char.append(terms[2])\n",
    "#             title_word.append(terms[3])\n",
    "#             desc_char.append('')\n",
    "#             desc_word.append('')\n",
    "    f.close()\n",
    "    return idx, pidx, title_char, title_word, desc_char, desc_word\n",
    "\n",
    "def getLenInfo(arr):\n",
    "    lenArray = [len([ii for ii in item.split(',') if ii != '']) for item in arr]\n",
    "    return lenArray\n",
    "\n",
    "def plotHist(lenArray, save_name):\n",
    "    save_path = './' + save_name + '.png'\n",
    "    plt.figure()\n",
    "    plt.title(save_name)\n",
    "    plt.xlabel(\"Length\")\n",
    "    plt.ylabel(\"Count\")\n",
    "    plt.xlim(-2, 10)\n",
    "    plt.hist(lenArray, bins=100, normed=1, alpha=.5)\n",
    "    plt.savefig(save_path)\n",
    "\n",
    "def analysis(fpath):\n",
    "    idx, pidx, title_char, title_word, desc_char, desc_word = getTopicInfo(fpath)\n",
    "#     pidx_len = np.array(getLenInfo(pidx))\n",
    "#     print np.min(pidx_len)\n",
    "#     title_char_len = np.array(getLenInfo(title_char))\n",
    "#     print np.min(title_char_len)\n",
    "#     title_word_len = np.array(getLenInfo(title_word))\n",
    "#     print np.min(title_word_len)\n",
    "#     desc_char_len = np.array(getLenInfo(desc_char))\n",
    "#     print np.min(desc_char_len)\n",
    "#     desc_word_len = np.array(getLenInfo(desc_word))\n",
    "#     print np.min(desc_word_len)\n",
    "    #plotHist(getLenInfo(pidx), \"topic_parent_topic\")\n",
    "    #plotHist(getLenInfo(title_char), \"topic_title_char\")\n",
    "    #plotHist(getLenInfo(title_word), \"topic_title_word\")\n",
    "    #plotHist(getLenInfo(desc_char), \"topic_desc_char\")\n",
    "    #plotHist(getLenInfo(desc_word), \"topic_desc_word\")\n",
    "\n",
    "topic_info_file = \"../ieee_zhihu_cup/topic_info.txt\"\n",
    "analysis(topic_info_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "def getLabelInfo(fpath):\n",
    "    f = open(fpath)\n",
    "    cnt = 0\n",
    "    idx, tidx = set(), []\n",
    "    for line in f:\n",
    "        cnt += 1\n",
    "        if cnt % 10000 == 0:\n",
    "            print cnt\n",
    "        terms = line.strip().split('\\t')\n",
    "        idx.add(terms[0])\n",
    "        if len(terms) == 2:\n",
    "            tidx.append(terms[1])\n",
    "    f.close()\n",
    "    return idx, tidx\n",
    "\n",
    "def getLenInfo(arr):\n",
    "    return [len([ii for ii in item.split(',') if ii != '']) for item in arr]\n",
    "\n",
    "def plotHist(lenArray, save_name):\n",
    "    save_path = './' + save_name + '.png'\n",
    "    plt.figure()\n",
    "    plt.title(save_name)\n",
    "    plt.xlabel(\"Length\")\n",
    "    plt.ylabel(\"Count\")\n",
    "    plt.xlim(-2,10)\n",
    "    plt.hist(lenArray, bins=100, normed=1, alpha=.5)\n",
    "    plt.savefig(save_path)\n",
    "\n",
    "def analysis(fpath):\n",
    "    idx, tidx = getLabelInfo(fpath)\n",
    "    label_len = np.array(getLenInfo(tidx))\n",
    "    print np.std(label_len)\n",
    "    # plotHist(getLenInfo(tidx), \"question_topic\")\n",
    "\n",
    "label_file = \"../ieee_zhihu_cup/question_topic_train_set.txt\"\n",
    "analysis(label_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10000\n",
      "20000\n",
      "30000\n",
      "40000\n",
      "50000\n",
      "60000\n",
      "70000\n",
      "80000\n",
      "90000\n",
      "100000\n",
      "110000\n",
      "120000\n",
      "130000\n",
      "140000\n",
      "150000\n",
      "160000\n",
      "170000\n",
      "180000\n",
      "190000\n",
      "200000\n",
      "210000\n",
      "220000\n",
      "230000\n",
      "240000\n",
      "250000\n",
      "260000\n",
      "270000\n",
      "280000\n",
      "290000\n",
      "300000\n",
      "310000\n",
      "320000\n",
      "330000\n",
      "340000\n",
      "350000\n",
      "360000\n",
      "370000\n",
      "380000\n",
      "390000\n",
      "400000\n",
      "410000\n",
      "1137889\n"
     ]
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "def getLabelInfo(fpath):\n",
    "    f = open(fpath)\n",
    "    cnt = 0\n",
    "    count = 0\n",
    "    idx, tidx = [], []\n",
    "    for line in f:\n",
    "        cnt += 1\n",
    "        if cnt % 10000 == 0:\n",
    "            print cnt\n",
    "        terms = line.strip().split(' ')\n",
    "        if terms[0] != '</s>':\n",
    "            idx.append(int(terms[0][1:]))\n",
    "    print sorted(idx)[-1]\n",
    "    f.close()\n",
    "    return idx, tidx\n",
    "\n",
    "def getLenInfo(arr):\n",
    "    return [len([ii for ii in item.split(',') if ii != '']) for item in arr]\n",
    "\n",
    "def plotHist(lenArray, save_name):\n",
    "    save_path = './' + save_name + '.png'\n",
    "    plt.figure()\n",
    "    plt.title(save_name)\n",
    "    plt.xlabel(\"Length\")\n",
    "    plt.ylabel(\"Count\")\n",
    "    plt.xlim(-2,10)\n",
    "    plt.hist(lenArray, bins=100, normed=1, alpha=.5)\n",
    "    plt.savefig(save_path)\n",
    "\n",
    "def analysis(fpath):\n",
    "    idx, tidx = getLabelInfo(fpath)\n",
    "#     label_len = np.array(getLenInfo(tidx))\n",
    "#     print np.std(label_len)\n",
    "    # plotHist(getLenInfo(tidx), \"question_topic\")\n",
    "\n",
    "label_file = \"../ieee_zhihu_cup/word_embedding.txt\"\n",
    "analysis(label_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['2188640265057376235', '-6346300739520814215']\n"
     ]
    }
   ],
   "source": [
    "topic_file = \"../ieee_zhihu_cup/topic_info.txt\"\n",
    "f = open(topic_file)\n",
    "topic_parent = {}\n",
    "for line in f:\n",
    "    terms = line.strip().split('\\t')\n",
    "    tid = terms[0]\n",
    "    ptid = [ii for ii in terms[1].split(',') if ii != '']\n",
    "    topic_parent[tid] = ptid\n",
    "print topic_parent['2339809570377332086']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "set()"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def getAllParents(tid):\n",
    "    allParents = set()\n",
    "    def getParents(tid):\n",
    "        parents = topic_parent[tid]\n",
    "        for parent in parents:\n",
    "            allParents.add(parent)\n",
    "            getParents(parent)\n",
    "    getParents(tid)\n",
    "    return allParents\n",
    "getAllParents('-873181404731804450')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10000\n",
      "20000\n",
      "30000\n",
      "40000\n",
      "50000\n",
      "60000\n",
      "70000\n",
      "80000\n",
      "90000\n",
      "100000\n",
      "110000\n",
      "120000\n",
      "130000\n",
      "140000\n",
      "150000\n",
      "160000\n",
      "170000\n",
      "180000\n",
      "190000\n",
      "200000\n",
      "210000\n",
      "220000\n",
      "230000\n",
      "240000\n",
      "250000\n",
      "260000\n",
      "270000\n",
      "280000\n",
      "290000\n",
      "300000\n",
      "310000\n",
      "320000\n",
      "330000\n",
      "340000\n",
      "350000\n",
      "360000\n",
      "370000\n",
      "380000\n",
      "390000\n",
      "400000\n",
      "410000\n",
      "420000\n",
      "430000\n",
      "440000\n",
      "450000\n",
      "460000\n",
      "470000\n",
      "480000\n",
      "490000\n",
      "500000\n",
      "510000\n",
      "520000\n",
      "530000\n",
      "540000\n",
      "550000\n",
      "560000\n",
      "570000\n",
      "580000\n",
      "590000\n",
      "600000\n",
      "610000\n",
      "620000\n",
      "630000\n",
      "640000\n",
      "650000\n",
      "660000\n",
      "670000\n",
      "680000\n",
      "690000\n",
      "700000\n",
      "710000\n",
      "720000\n",
      "730000\n",
      "740000\n",
      "750000\n",
      "760000\n",
      "770000\n",
      "780000\n",
      "790000\n",
      "800000\n",
      "810000\n",
      "820000\n",
      "830000\n",
      "840000\n",
      "850000\n",
      "860000\n",
      "870000\n",
      "880000\n",
      "890000\n",
      "900000\n",
      "910000\n",
      "920000\n",
      "930000\n",
      "940000\n",
      "950000\n",
      "960000\n",
      "970000\n",
      "980000\n",
      "990000\n",
      "1000000\n",
      "1010000\n",
      "1020000\n",
      "1030000\n",
      "1040000\n",
      "1050000\n",
      "1060000\n",
      "1070000\n",
      "1080000\n",
      "1090000\n",
      "1100000\n",
      "1110000\n",
      "1120000\n",
      "1130000\n",
      "1140000\n",
      "1150000\n",
      "1160000\n",
      "1170000\n",
      "1180000\n",
      "1190000\n",
      "1200000\n",
      "1210000\n",
      "1220000\n",
      "1230000\n",
      "1240000\n",
      "1250000\n",
      "1260000\n",
      "1270000\n",
      "1280000\n",
      "1290000\n",
      "1300000\n",
      "1310000\n",
      "1320000\n",
      "1330000\n",
      "1340000\n",
      "1350000\n",
      "1360000\n",
      "1370000\n",
      "1380000\n",
      "1390000\n",
      "1400000\n",
      "1410000\n",
      "1420000\n",
      "1430000\n",
      "1440000\n",
      "1450000\n",
      "1460000\n",
      "1470000\n",
      "1480000\n",
      "1490000\n",
      "1500000\n",
      "1510000\n",
      "1520000\n",
      "1530000\n",
      "1540000\n",
      "1550000\n",
      "1560000\n",
      "1570000\n",
      "1580000\n",
      "1590000\n",
      "1600000\n",
      "1610000\n",
      "1620000\n",
      "1630000\n",
      "1640000\n",
      "1650000\n",
      "1660000\n",
      "1670000\n",
      "1680000\n",
      "1690000\n",
      "1700000\n",
      "1710000\n",
      "1720000\n",
      "1730000\n",
      "1740000\n",
      "1750000\n",
      "1760000\n",
      "1770000\n",
      "1780000\n",
      "1790000\n",
      "1800000\n",
      "1810000\n",
      "1820000\n",
      "1830000\n",
      "1840000\n",
      "1850000\n",
      "1860000\n",
      "1870000\n",
      "1880000\n",
      "1890000\n",
      "1900000\n",
      "1910000\n",
      "1920000\n",
      "1930000\n",
      "1940000\n",
      "1950000\n",
      "1960000\n",
      "1970000\n",
      "1980000\n",
      "1990000\n",
      "2000000\n",
      "2010000\n",
      "2020000\n",
      "2030000\n",
      "2040000\n",
      "2050000\n",
      "2060000\n",
      "2070000\n",
      "2080000\n",
      "2090000\n",
      "2100000\n",
      "2110000\n",
      "2120000\n",
      "2130000\n",
      "2140000\n",
      "2150000\n",
      "2160000\n",
      "2170000\n",
      "2180000\n",
      "2190000\n",
      "2200000\n",
      "2210000\n",
      "2220000\n",
      "2230000\n",
      "2240000\n",
      "2250000\n",
      "2260000\n",
      "2270000\n",
      "2280000\n",
      "2290000\n",
      "2300000\n",
      "2310000\n",
      "2320000\n",
      "2330000\n",
      "2340000\n",
      "2350000\n",
      "2360000\n",
      "2370000\n",
      "2380000\n",
      "2390000\n",
      "2400000\n",
      "2410000\n",
      "2420000\n",
      "2430000\n",
      "2440000\n",
      "2450000\n",
      "2460000\n",
      "2470000\n",
      "2480000\n",
      "2490000\n",
      "2500000\n",
      "2510000\n",
      "2520000\n",
      "2530000\n",
      "2540000\n",
      "2550000\n",
      "2560000\n",
      "2570000\n",
      "2580000\n",
      "2590000\n",
      "2600000\n",
      "2610000\n",
      "2620000\n",
      "2630000\n",
      "2640000\n",
      "2650000\n",
      "2660000\n",
      "2670000\n",
      "2680000\n",
      "2690000\n",
      "2700000\n",
      "2710000\n",
      "2720000\n",
      "2730000\n",
      "2740000\n",
      "2750000\n",
      "2760000\n",
      "2770000\n",
      "2780000\n",
      "2790000\n",
      "2800000\n",
      "2810000\n",
      "2820000\n",
      "2830000\n",
      "2840000\n",
      "2850000\n",
      "2860000\n",
      "2870000\n",
      "2880000\n",
      "2890000\n",
      "2900000\n",
      "2910000\n",
      "2920000\n",
      "2930000\n",
      "2940000\n",
      "2950000\n",
      "2960000\n",
      "2970000\n",
      "2980000\n",
      "2990000\n",
      "9.11716721145\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "label_file = \"../ieee_zhihu_cup/question_topic_train_set.txt\"\n",
    "f = open(label_file)\n",
    "percentArr = []\n",
    "count = 0\n",
    "for line in f:\n",
    "    count += 1\n",
    "    if count % 10000 == 0:\n",
    "        print count\n",
    "    terms = line.strip().split('\\t')\n",
    "    tids = [ii for ii in terms[1].split(',') if ii != '']\n",
    "    cnt = 0.0\n",
    "    for tid in tids:\n",
    "        parents = getAllParents(tid)\n",
    "        cnt += len([1 for tid in tids if tid in parents])\n",
    "    if cnt > 0:\n",
    "        percentArr.append(cnt / (len(tids) * (len(tids) - 1)))\n",
    "    else:\n",
    "        percentArr.append(cnt)\n",
    "print np.average(np.array(percentArr)) * 100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "train_label_all = np.load('../data_preprocess/train_all/train_label_indices_all.npy')\n",
    "samples_every_class = np.zeros(1999)\n",
    "for labels in train_label_all:\n",
    "    for label in labels:\n",
    "        samples_every_class[label] += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Container object of 1999 artists>"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFGtJREFUeJzt3X+s3XV9x/Hn21bQoEjRriEtszg7STUZwg108Uc20FKY\ns2wzBmKkc8xmERLNtrgSk+HUP2TLZCPzx1AaW6MCUwmNymqHZGZ/FLgFBApiLwihTaGVVnBxwYHv\n/XE+l33t597bc+4953zv7Xk+kpP7PZ/v53u+7+/3nPN9fb/f8z3nRmYiSVLTS9ouQJI0/xgOkqSK\n4SBJqhgOkqSK4SBJqhgOkqRKV+EQEY9FxP0RcW9EjJe2kyNiR0TsKX+XlPaIiGsjYiIi7ouIMxuP\ns6H03xMRGxrtZ5XHnyjTRr8XVJLUvV6OHH4/M8/IzLFyfxNwW2auAm4r9wEuAFaV20bg89AJE+Aq\n4BzgbOCqyUApfT7YmG7drJdIkjRnczmttB7YUoa3ABc12rdmx07gpIg4BTgf2JGZhzLzMLADWFfG\nnZiZO7PzjbytjceSJLVgcZf9EvheRCTwr5l5HbAsM/eX8U8Cy8rwcuCJxrR7S9tM7XunaK9ExEY6\nRyOccMIJZ51++uldli9J2rVr108zc2k3fbsNh7dm5r6I+A1gR0T8qDkyM7MEx0CVULoOYGxsLMfH\nxwc9S0k6ZkTE49327eq0UmbuK38PADfT+czgqXJKiPL3QOm+Dzi1MfmK0jZT+4op2iVJLTlqOETE\nCRHxyslhYC3wALANmLziaANwSxneBlxarlpaAzxTTj9tB9ZGxJLyQfRaYHsZ92xErClXKV3aeCxJ\nUgu6Oa20DLi5XF26GPhaZv57RNwF3BQRlwGPA+8t/b8LXAhMAL8APgCQmYci4pPAXaXfJzLzUBn+\nEPBl4OXAreUmSWpJLNSf7PYzB0nqTUTsanwdYUZ+Q1qSVDEcJEkVw0GSVDEcJEkVw0GSVDEcJEkV\nw0GSVDEcJEkVw0GSVDEcJEkVw0GSVDEcJEkVw0GSVDEcJEkVw0GSVDEcJEkVw0GSVDEcJEkVw0GS\nVDEcJEkVw0GSVDEcJEkVw0GSVDEcJEkVw0GSVDEcJEkVw0GSVDEcJEkVw0GSVDEcJEkVw0GSVDEc\nJEkVw0GSVDEcJEkVw0GSVOk6HCJiUUTcExHfLvdPi4g7ImIiIm6MiONK+/Hl/kQZv7LxGFeW9ocj\n4vxG+7rSNhERm/q3eJKk2ejlyOHDwEON+1cD12Tm64HDwGWl/TLgcGm/pvQjIlYDFwNvBNYBnyuB\nswj4LHABsBq4pPSVJLWkq3CIiBXAHwBfKvcDOBf4RumyBbioDK8v9ynjzyv91wM3ZOZzmfkTYAI4\nu9wmMvPRzPwlcEPpK0lqSbdHDv8EfBT4Vbn/auBnmfl8ub8XWF6GlwNPAJTxz5T+L7YfMc107ZWI\n2BgR4xExfvDgwS5LlyT16qjhEBHvAg5k5q4h1DOjzLwuM8cyc2zp0qVtlyNJx6zFXfR5C/DuiLgQ\neBlwIvDPwEkRsbgcHawA9pX++4BTgb0RsRh4FfB0o31Sc5rp2iVJLTjqkUNmXpmZKzJzJZ0PlL+f\nme8DbgfeU7ptAG4pw9vKfcr472dmlvaLy9VMpwGrgDuBu4BV5eqn48o8tvVl6SRJs9LNkcN0/ga4\nISI+BdwDXF/arwe+EhETwCE6G3syc3dE3AQ8CDwPXJ6ZLwBExBXAdmARsDkzd8+hLknSHEVnp37h\nGRsby/Hx8bbLkKQFIyJ2ZeZYN339hrQkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIq\nhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMk\nqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4\nSJIqRw2HiHhZRNwZET+MiN0R8Xel/bSIuCMiJiLixog4rrQfX+5PlPErG491ZWl/OCLOb7SvK20T\nEbGp/4spSepFN0cOzwHnZubvAGcA6yJiDXA1cE1mvh44DFxW+l8GHC7t15R+RMRq4GLgjcA64HMR\nsSgiFgGfBS4AVgOXlL6SpJYcNRyy47/L3ZeWWwLnAt8o7VuAi8rw+nKfMv68iIjSfkNmPpeZPwEm\ngLPLbSIzH83MXwI3lL6SpJZ09ZlD2cO/FzgA7AAeAX6Wmc+XLnuB5WV4OfAEQBn/DPDqZvsR00zX\nPlUdGyNiPCLGDx482E3pkqRZ6CocMvOFzDwDWEFnT//0gVY1fR3XZeZYZo4tXbq0jRIkaST0dLVS\nZv4MuB34XeCkiFhcRq0A9pXhfcCpAGX8q4Cnm+1HTDNduySpJd1crbQ0Ik4qwy8H3gk8RCck3lO6\nbQBuKcPbyn3K+O9nZpb2i8vVTKcBq4A7gbuAVeXqp+PofGi9rR8LJ0mancVH78IpwJZyVdFLgJsy\n89sR8SBwQ0R8CrgHuL70vx74SkRMAIfobOzJzN0RcRPwIPA8cHlmvgAQEVcA24FFwObM3N23JZQk\n9Sw6O/ULz9jYWI6Pj7ddhiQtGBGxKzPHuunrN6QlSRXDQZJUMRwkSRXDQZJUMRwkSRXDQceclZu+\n03YJ0oJnOEiSKoaDJKliOEiSKoaDJKliOEiSKoaDJKliOEiSKoaDJKliOEiSKoaDJKliOEiSKoaD\nJKliOEiSKoaDJKliOEiSKoaDJKliOEiSKoaDJKliOEiSKoaDJKliOEiSKoaDJKliOEiSKoaDJKli\nOEiSKoaDJKliOEiSKoaDJKliOEiSKkcNh4g4NSJuj4gHI2J3RHy4tJ8cETsiYk/5u6S0R0RcGxET\nEXFfRJzZeKwNpf+eiNjQaD8rIu4v01wbETGIhZUkdaebI4fngb/KzNXAGuDyiFgNbAJuy8xVwG3l\nPsAFwKpy2wh8HjphAlwFnAOcDVw1GSilzwcb062b+6JJkmbrqOGQmfsz8+4y/HPgIWA5sB7YUrpt\nAS4qw+uBrdmxEzgpIk4Bzgd2ZOahzDwM7ADWlXEnZubOzExga+OxJEkt6Okzh4hYCbwZuANYlpn7\ny6gngWVleDnwRGOyvaVtpva9U7RPNf+NETEeEeMHDx7spXRJUg+6DoeIeAXwTeAjmflsc1zZ488+\n11bJzOsycywzx5YuXTro2UnSyOoqHCLipXSC4auZ+a3S/FQ5JUT5e6C07wNObUy+orTN1L5iinZJ\nUku6uVopgOuBhzLzM41R24DJK442ALc02i8tVy2tAZ4pp5+2A2sjYkn5IHotsL2MezYi1pR5Xdp4\nLElSC7o5cngL8H7g3Ii4t9wuBD4NvDMi9gDvKPcBvgs8CkwAXwQ+BJCZh4BPAneV2ydKG6XPl8o0\njwC39mHZFpyVm77TdgmSBMDio3XIzP8CpvvewXlT9E/g8mkeazOweYr2ceBNR6tFkjQcfkNaklQx\nHCRJFcNBklQxHCRJFcNBwMK6Umoh1SotVIaDJKliOEiSKobDkHgqRNJCYjhIkiqGg1rhkZQ0vxkO\nkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgO0pD50yFaCAwHSVLF\ncJAkVQwHSVLFcJAkVQyHWfJDRUnHMsNBklQxHDQrHjlJxzbDQZJUMRwkSRXDYR7ylI2kthkOkqSK\n4SD1wKM6jQrDYY7cWEg6Fh01HCJic0QciIgHGm0nR8SOiNhT/i4p7RER10bERETcFxFnNqbZUPrv\niYgNjfazIuL+Ms21ERH9XkhJUm+6OXL4MrDuiLZNwG2ZuQq4rdwHuABYVW4bgc9DJ0yAq4BzgLOB\nqyYDpfT5YGO6I+clSRqyo4ZDZv4AOHRE83pgSxneAlzUaN+aHTuBkyLiFOB8YEdmHsrMw8AOYF0Z\nd2Jm7szMBLY2HkuS1JLZfuawLDP3l+EngWVleDnwRKPf3tI2U/veKdqnFBEbI2I8IsYPHjw4y9IH\ny88gJB0L5vyBdNnjzz7U0s28rsvMscwcW7p06TBmKUkjabbh8FQ5JUT5e6C07wNObfRbUdpmal8x\nRbum4ZGJpGGYbThsAyavONoA3NJov7RctbQGeKacftoOrI2IJeWD6LXA9jLu2YhYU65SurTxWEPl\nRndh8HmShmPx0TpExNeB3wNeExF76Vx19Gngpoi4DHgceG/p/l3gQmAC+AXwAYDMPBQRnwTuKv0+\nkZmTH3J/iM4VUS8Hbi03SVKLjhoOmXnJNKPOm6JvApdP8zibgc1TtI8DbzpaHZKk4fEb0pKkiuEw\nA89vSxpVhoMkqWI4zDMerUiaDwwHSVLFcNCLPGqRNMlwkCRVDAdJUsVw6MJCON2yEGqUtHAYDmrN\nfAy0+ViT1AbDQZqHDCm1zXCQJFUMB8077jVL7TMcJEkVw2Ea7r3OjetPWtgMB0lSxXCYBxbSXvYg\nah3m8i+kdS21yXBomMuGozntMDZAk/OYal69zN+NpaSpGA4jYqYwOZYc68un+eVYfr0ZDtIsHcsb\nBmlkw2GYe9KjshEZleWURsHIhsNCcOTGdrqNrxvlhW9UnsNRWc6mhbrMhsM8sdCvApqthVCjRsOo\nfC7XLcNBC9LKTd/xTdwHnlYdjH4uc1vrz3A4wqCfiFF6o4zSsk4a1DKP4rrslgE3GIZDj/r9HYJj\n4UU30zIslDfubB9j2N9vmYv59GXD2X5+5hHj8Ix0OLT1ZbGF8uJeKHVOGsSGuh+h0S/z6fkYdi1t\nLHu385xPz0s/jXQ4jIJ+bdyOhTdAW1d7zWW+c61tNtPP5UjwWAuNmd4Hg3w9zYcrEw2HWRjmaYy2\nN8pz/XmOuU4zyD3wYb4BB/l8z/Y5mm0ITDe/+fJ5Xb9Pc7Z12rLt977hMERz2Ysf5F7mXPYGe51n\nPzdIvcyvH3VO91iD3tOean7T1TDVnm034dFN335976abGns9RdhN4E8339kEWzfPyXT9jmxvOwSm\nM5LhMMgPy+ajXvf05rqH1o+w6dcRSy+63bD0Mm23jz/V+G7mOcg90rku92yWZ6q/3c63l8CbjX7u\nCE2OP1pQ9nsZejGS4SBJmpnhIEmqGA6SpIrhIEmqzJtwiIh1EfFwRExExKa265GkUTYvwiEiFgGf\nBS4AVgOXRMTqdquSpNE1L8IBOBuYyMxHM/OXwA3A+pZrkqSRFZnZdg1ExHuAdZn55+X++4FzMvOK\nI/ptBDaWu28AHp7lLF8D/HSW0w6SdfXGunpjXb05Fut6bWYu7abj4lnOoBWZeR1w3VwfJyLGM3Os\nDyX1lXX1xrp6Y129GfW65stppX3AqY37K0qbJKkF8yUc7gJWRcRpEXEccDGwreWaJGlkzYvTSpn5\nfERcAWwHFgGbM3P3AGc551NTA2JdvbGu3lhXb0a6rnnxgbQkaX6ZL6eVJEnziOEgSaqMVDi0+RMd\nEXFqRNweEQ9GxO6I+HBp/3hE7IuIe8vtwsY0V5ZaH46I8wdY22MRcX+Z/3hpOzkidkTEnvJ3SWmP\niLi21HVfRJw5oJre0Fgn90bEsxHxkbbWV0RsjogDEfFAo63ndRQRG0r/PRGxYUB1/UNE/KjM++aI\nOKm0r4yI/2msuy80pjmrvAYmSu0xgLp6fu76/Z6dpq4bGzU9FhH3lvZhrq/ptg/tvcYycyRudD7o\nfgR4HXAc8ENg9RDnfwpwZhl+JfBjOj8V8nHgr6fov7rUeDxwWql90YBqewx4zRFtfw9sKsObgKvL\n8IXArUAAa4A7hvTcPQm8tq31BbwdOBN4YLbrCDgZeLT8XVKGlwygrrXA4jJ8daOulc1+RzzOnaXW\nKLVfMIC6enruBvGenaquI8b/I/C3Layv6bYPrb3GRunIodWf6MjM/Zl5dxn+OfAQsHyGSdYDN2Tm\nc5n5E2CCzjIMy3pgSxneAlzUaN+aHTuBkyLilAHXch7wSGY+PkOfga6vzPwBcGiKefayjs4HdmTm\nocw8DOwA1vW7rsz8XmY+X+7upPO9oWmV2k7MzJ3Z2cJsbSxL3+qawXTPXd/fszPVVfb+3wt8fabH\nGND6mm770NprbJTCYTnwROP+XmbeOA9MRKwE3gzcUZquKIeGmycPGxluvQl8LyJ2RecnSgCWZeb+\nMvwksKyFuiZdzK+/YdteX5N6XUdt1PhndPYwJ50WEfdExH9GxNtK2/JSyzDq6uW5G/b6ehvwVGbu\nabQNfX0dsX1o7TU2SuEwL0TEK4BvAh/JzGeBzwO/BZwB7KdzWDtsb83MM+n8Ku7lEfH25siyd9TK\nNc/R+VLku4F/K03zYX1V2lxH04mIjwHPA18tTfuB38zMNwN/CXwtIk4cYknz8rlruIRf3wkZ+vqa\nYvvwomG/xkYpHFr/iY6IeCmdJ/6rmfktgMx8KjNfyMxfAV/k/0+FDK3ezNxX/h4Abi41PDV5uqj8\nPTDsuooLgLsz86lSY+vrq6HXdTS0GiPiT4F3Ae8rGxXKaZuny/AuOufzf7vU0Dz1NJC6ZvHcDXN9\nLQb+GLixUe9Q19dU2wdafI2NUji0+hMd5Xzm9cBDmfmZRnvzfP0fAZNXUWwDLo6I4yPiNGAVnQ/B\n+l3XCRHxyslhOh9mPlDmP3mlwwbglkZdl5arJdYAzzQOewfh1/bm2l5fR+h1HW0H1kbEknJKZW1p\n66uIWAd8FHh3Zv6i0b40Ov87hYh4HZ119Gip7dmIWFNep5c2lqWfdfX63A3zPfsO4EeZ+eLpomGu\nr+m2D7T5GpvLJ+wL7UbnE/4f09kD+NiQ5/1WOoeE9wH3ltuFwFeA+0v7NuCUxjQfK7U+zByvhpih\nrtfRuQrkh8DuyfUCvBq4DdgD/AdwcmkPOv+Y6ZFS99gA19kJwNPAqxptrawvOgG1H/hfOudxL5vN\nOqLzGcBEuX1gQHVN0DnvPPk6+0Lp+yflOb4XuBv4w8bjjNHZWD8C/Avl1xP6XFfPz12/37NT1VXa\nvwz8xRF9h7m+pts+tPYa8+czJEmVUTqtJEnqkuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkyv8B\nQ07IlEEui6AAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7fea663f2cd0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "plt.ylim(0, 50000)\n",
    "plt.bar(range(1999),samples_every_class)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_label = np.load('../data_preprocess/train/train_label_indices.npy')\n",
    "samples_every_class = np.zeros(1999)\n",
    "for labels in train_label_all:\n",
    "    for label in labels:\n",
    "        samples_every_class[label] += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Container object of 1999 artists>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFiJJREFUeJzt3X+s3fV93/HnqzhkKA2xCXcWsklNVi+VW6kJXIGrptEW\nNmNIF7MtQqCq9hiLNQWmRNvUOYs0uqR/kE1rVrSUihUPO0pLWNoIq0Adj0Sr9ocJ14HwM9QXAsKW\nwW5MoBtTMtL3/jifmx78vT/O/Xnu9X0+pKPzPe/v53u+7/M9535f53zP99ipKiRJ6vdTw25AkrT8\nGA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaDJKnDcJAkdawZdgNzdeGFF9amTZuG3YYkrRhH\njhz5i6oaGWTsig2HTZs2MTY2Nuw2JGnFSPLioGM9rCRJ6jAcJEkdhoMkqcNwkCR1GA6SpA7DQZLU\nYThIkjoMB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2G\ngySpw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6hgoHJKsTfLVJN9N8kySX0pyQZJDSY6263Vt\nbJLcnmQ8yeNJLu27n11t/NEku/rqlyV5oi1ze5Is/EOVJA1q0E8OvwP8aVX9HPCLwDPAHuChqtoM\nPNRuA1wNbG6X3cAdAEkuAG4FrgAuB26dCJQ25uN9y22f38OSJM3HjOGQ5F3Ah4C7AKrqR1X1A2AH\nsK8N2wdc26Z3APur5zCwNslFwFXAoao6XVWvAoeA7W3e+VV1uKoK2N93X5KkIRjkk8MlwCngvyV5\nNMnvJ3kHsL6qTrQxLwPr2/QG4KW+5Y+12nT1Y5PUO5LsTjKWZOzUqVMDtC5JmotBwmENcClwR1V9\nAPg//PUhJADaO/5a+PbeqqrurKrRqhodGRlZ7NVJ0qo1SDgcA45V1cPt9lfphcUr7ZAQ7fpkm38c\nuLhv+Y2tNl194yR1SdKQzBgOVfUy8FKS97XSlcDTwAFg4oyjXcB9bfoAsLOdtbQVeK0dfjoIbEuy\nrn0RvQ042Oa9nmRrO0tpZ999SZKGYM2A4/4F8OUk5wLPAzfSC5Z7k9wEvAhc18Y+AFwDjANvtLFU\n1ekknwMeaeM+W1Wn2/QngLuB84AH20WSNCTpfV2w8oyOjtbY2Niw25CkFSPJkaoaHWSsv5CWJHUY\nDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+Eg\nSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqGCgckryQ5IkkjyUZa7ULkhxK\ncrRdr2v1JLk9yXiSx5Nc2nc/u9r4o0l29dUva/c/3pbNQj9QSdLgZvPJ4e9W1furarTd3gM8VFWb\ngYfabYCrgc3tshu4A3phAtwKXAFcDtw6EShtzMf7lts+50ckSZq3+RxW2gHsa9P7gGv76vur5zCw\nNslFwFXAoao6XVWvAoeA7W3e+VV1uKoK2N93X5KkIRg0HAr4epIjSXa32vqqOtGmXwbWt+kNwEt9\nyx5rtenqxyapS5KGZM2A4z5YVceT/E3gUJLv9s+sqkpSC9/eW7Vg2g3wnve8Z7FXJ0mr1kCfHKrq\neLs+CXyN3ncGr7RDQrTrk234ceDivsU3ttp09Y2T1Cfr486qGq2q0ZGRkUFalyTNwYzhkOQdSd45\nMQ1sA54EDgATZxztAu5r0weAne2spa3Aa+3w00FgW5J17YvobcDBNu/1JFvbWUo7++5LkjQEgxxW\nWg98rZ1dugb4g6r60ySPAPcmuQl4EbiujX8AuAYYB94AbgSoqtNJPgc80sZ9tqpOt+lPAHcD5wEP\ntoskaUjSO0Fo5RkdHa2xsbFhtyFJK0aSI30/R5iWv5CWJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ\n6jAcJEkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQO\nw0GS1GE4SJI6DAdJUofhIEnqGDgckpyT5NEkf9JuX5Lk4STjSb6S5NxWf3u7Pd7mb+q7j0+3+rNJ\nruqrb2+18SR7Fu7hSZLmYjafHD4JPNN3+/PAF6rqZ4FXgZta/Sbg1Vb/QhtHki3A9cDPA9uB322B\ncw7wReBqYAtwQxsrSRqSgcIhyUbgI8Dvt9sBPgx8tQ3ZB1zbpne027T5V7bxO4B7quqHVfU9YBy4\nvF3Gq+r5qvoRcE8bK0kakkE/Ofxn4DeAv2q33w38oKrebLePARva9AbgJYA2/7U2/if1M5aZqi5J\nGpIZwyHJrwInq+rIEvQzUy+7k4wlGTt16tSw25Gks9Ygnxx+GfhokhfoHfL5MPA7wNoka9qYjcDx\nNn0cuBigzX8X8P3++hnLTFXvqKo7q2q0qkZHRkYGaF2SNBczhkNVfbqqNlbVJnpfKH+jqn4N+Cbw\nsTZsF3Bfmz7QbtPmf6OqqtWvb2czXQJsBr4FPAJsbmc/ndvWcWBBHp0kaU7WzDxkSv8GuCfJbwGP\nAne1+l3Al5KMA6fp7eypqqeS3As8DbwJ3FxVPwZIcgtwEDgH2FtVT82jL0nSPKX3pn7lGR0drbGx\nsWG3IUkrRpIjVTU6yFh/IS1J6jAcJEkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+EgSeow\nHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNB\nktQxYzgk+RtJvpXkO0meSvLvW/2SJA8nGU/ylSTntvrb2+3xNn9T3319utWfTXJVX317q40n2bPw\nD1OSNBuDfHL4IfDhqvpF4P3A9iRbgc8DX6iqnwVeBW5q428CXm31L7RxJNkCXA/8PLAd+N0k5yQ5\nB/gicDWwBbihjZUkDcmM4VA9/7vdfFu7FPBh4Kutvg+4tk3vaLdp869Mkla/p6p+WFXfA8aBy9tl\nvKqer6ofAfe0sZKkIRnoO4f2Dv8x4CRwCHgO+EFVvdmGHAM2tOkNwEsAbf5rwLv762csM1VdkjQk\nA4VDVf24qt4PbKT3Tv/nFrWrKSTZnWQsydipU6eG0YIkrQqzOlupqn4AfBP4JWBtkjVt1kbgeJs+\nDlwM0Oa/C/h+f/2MZaaqT7b+O6tqtKpGR0ZGZtO6JGkWBjlbaSTJ2jZ9HvD3gWfohcTH2rBdwH1t\n+kC7TZv/jaqqVr++nc10CbAZ+BbwCLC5nf10Lr0vrQ8sxIOTJM3NmpmHcBGwr51V9FPAvVX1J0me\nBu5J8lvAo8BdbfxdwJeSjAOn6e3sqaqnktwLPA28CdxcVT8GSHILcBA4B9hbVU8t2COUJM1aem/q\nV57R0dEaGxsbdhuStGIkOVJVo4OM9RfSkqQOw0GS1GE4SJI6DAeddTbtuX/YLUgrnuEgSeowHCRJ\nHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRh\nOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqWPGcEhycZJvJnk6yVNJPtnqFyQ5lORou17X6klye5LxJI8n\nubTvvna18UeT7OqrX5bkibbM7UmyGA9WkjSYQT45vAn8q6raAmwFbk6yBdgDPFRVm4GH2m2Aq4HN\n7bIbuAN6YQLcClwBXA7cOhEobczH+5bbPv+HJkmaqxnDoapOVNW32/RfAs8AG4AdwL42bB9wbZve\nAeyvnsPA2iQXAVcBh6rqdFW9ChwCtrd551fV4aoqYH/ffUmShmBW3zkk2QR8AHgYWF9VJ9qsl4H1\nbXoD8FLfYsdabbr6sUnqk61/d5KxJGOnTp2aTeuSpFkYOByS/DTwR8Cnqur1/nntHX8tcG8dVXVn\nVY1W1ejIyMhir06SVq2BwiHJ2+gFw5er6o9b+ZV2SIh2fbLVjwMX9y2+sdWmq2+cpC5JGpJBzlYK\ncBfwTFX9dt+sA8DEGUe7gPv66jvbWUtbgdfa4aeDwLYk69oX0duAg23e60m2tnXt7LuvVWXTnvuH\n3YIkAbBmgDG/DPw68ESSx1rt3wK3AfcmuQl4EbiuzXsAuAYYB94AbgSoqtNJPgc80sZ9tqpOt+lP\nAHcD5wEPtoskaUhmDIeq+l/AVL87uHKS8QXcPMV97QX2TlIfA35hpl4kSUvDX0hLkjoMB0lSh+Eg\nYGV9Gb6SepVWKsNhibhDk7SSGA6SpA7DQUPhJylpeTMcJEkdhoMkqcNwkCR1GA6SpA7DQZLUYThI\nkjoMB0lSh+EgLTF/46GVwHCQJHUYDpKkDsNBktRhOMyRx40lnc0MB82J4Sid3QwHSVKH4bAM+a5c\n0rAZDtIsGNxaLWYMhyR7k5xM8mRf7YIkh5IcbdfrWj1Jbk8ynuTxJJf2LbOrjT+aZFdf/bIkT7Rl\nbk+ShX6Qi8mdhaSz0SCfHO4Gtp9R2wM8VFWbgYfabYCrgc3tshu4A3phAtwKXAFcDtw6EShtzMf7\nljtzXZKkJTZjOFTVnwGnzyjvAPa16X3AtX31/dVzGFib5CLgKuBQVZ2uqleBQ8D2Nu/8qjpcVQXs\n77svSdKQzPU7h/VVdaJNvwysb9MbgJf6xh1rtenqxyapr1geZpJ0Npj3F9LtHX8tQC8zSrI7yViS\nsVOnTi3FKiVpVZprOLzSDgnRrk+2+nHg4r5xG1ttuvrGSeqTqqo7q2q0qkZHRkbm2PrkVso7/pXS\np6SVba7hcACYOONoF3BfX31nO2tpK/BaO/x0ENiWZF37InobcLDNez3J1naW0s6++5I6DEdpaayZ\naUCSPwT+DnBhkmP0zjq6Dbg3yU3Ai8B1bfgDwDXAOPAGcCNAVZ1O8jngkTbus1U18SX3J+idEXUe\n8GC7SJKGaMZwqKobpph15SRjC7h5ivvZC+ydpD4G/MJMfQzDpj3388JtHxl2G5K05PyFtCSpw3BY\nZjymLmk5MBz0EwaTpAmGgySpw3AYwEp4R70SepS0chgOGprlGGjLsSdpGAwHaRkypDRshoOWHXeM\n0vAZDpKkDsNhCr57nR+3n7SyGQ7LwErakS5Gr0v5+FfStpaGyXDoM58dR/+yS7EDmljHZOuarnbm\nvLNtZ3m2PR5pWAwHaY4MIp3Nr4FVGw7TvfNerHWd7VbL49Tc+RpZOVZtOCw3gxwemuoPa7Z1LT8+\nV8O3WG8YV+pzazhoqOb6h7Npz/0r9o9uOfGT8+JYyMc8rO1nOJxhsZ+Is/ELYT+59CxmYK20bbmU\n28GAWxyGw4Dm8pFzpe80F6LPYZ25Ndv1zvW5WsrneL73uZzezc7nEOlS/f0Mup6V8vc8W6s6HM7W\nJ7XffA7bLMT9LCfDCuv5rHe+Y+by2Ga6v4Ve33wM85P+bJ7XpXqzspBWdTjM1ULscBfjXclCBcFM\n8xb7cfQvs1BfEk7W83R/gAv1W5FB1jXTfc0mIPqvF3onP9OOctDnajFfp4sRXvN57S32G4PFtGao\nax+SxTpUsFyPN892h7OYZ0kNOm+hji1v2nM/L9z2kYHHzqY+lzGT9TLb0JnLu9D+9U61bV+47SNT\nPgdTbcPptu9CrXe65c6szXTfg8ybynSvz4nbgz7eM+dP9DLVtpjN63ih+MlBktSxbMIhyfYkzyYZ\nT7Jn2P1I0mq2LMIhyTnAF4GrgS3ADUm2DLcrSVq9lkU4AJcD41X1fFX9CLgH2DHkniRp1Vou4bAB\neKnv9rFWkyQNQapq2D2Q5GPA9qr6Z+32rwNXVNUtZ4zbDexuN98HPDvHVV4I/MUcl11M9jU79jU7\n9jU7Z2NfP1NVI4MMXC6nsh4HLu67vbHV3qKq7gTunO/KkoxV1eh872eh2dfs2Nfs2NfsrPa+lsth\npUeAzUkuSXIucD1wYMg9SdKqtSw+OVTVm0luAQ4C5wB7q+qpIbclSavWsggHgKp6AHhgiVY370NT\ni8S+Zse+Zse+ZmdV97UsvpCWJC0vy+U7B0nSMrKqwmGY/0RHkouTfDPJ00meSvLJVv/NJMeTPNYu\n1/Qt8+nW67NJrlrE3l5I8kRb/1irXZDkUJKj7XpdqyfJ7a2vx5Ncukg9va9vmzyW5PUknxrW9kqy\nN8nJJE/21Wa9jZLsauOPJtm1SH39xyTfbev+WpK1rb4pyf/t23a/17fMZe01MN56zyL0NevnbqH/\nZqfo6yt9Pb2Q5LFWX8rtNdX+YXivsapaFRd6X3Q/B7wXOBf4DrBlCdd/EXBpm34n8Of0/qmQ3wT+\n9STjt7Qe3w5c0no/Z5F6ewG48IzafwD2tOk9wOfb9DXAg0CArcDDS/TcvQz8zLC2F/Ah4FLgyblu\nI+AC4Pl2va5Nr1uEvrYBa9r05/v62tQ/7oz7+VbrNa33qxehr1k9d4vxNztZX2fM/0/AvxvC9ppq\n/zC019hq+uQw1H+io6pOVNW32/RfAs8w/a/AdwD3VNUPq+p7wDi9x7BUdgD72vQ+4Nq++v7qOQys\nTXLRIvdyJfBcVb04zZhF3V5V9WfA6UnWOZttdBVwqKpOV9WrwCFg+0L3VVVfr6o3283D9H43NKXW\n2/lVdbh6e5j9fY9lwfqaxlTP3YL/zU7XV3v3fx3wh9PdxyJtr6n2D0N7ja2mcFg2/0RHkk3AB4CH\nW+mW9tFw78THRpa23wK+nuRIer9CB1hfVSfa9MvA+iH0NeF63voHO+ztNWG222gYPf5Teu8wJ1yS\n5NEk/zPJr7TahtbLUvQ1m+duqbfXrwCvVNXRvtqSb68z9g9De42tpnBYFpL8NPBHwKeq6nXgDuBv\nAe8HTtD7WLvUPlhVl9L7V3FvTvKh/pnt3dFQTmtL70eRHwX+eysth+3VMcxtNJUknwHeBL7cSieA\n91TVB4B/CfxBkvOXsKVl+dz1uYG3vglZ8u01yf7hJ5b6NbaawmGgf6JjMSV5G70n/stV9ccAVfVK\nVf24qv4K+K/89aGQJeu3qo6365PA11oPr0wcLmrXJ5e6r+Zq4NtV9Urrcejbq89st9GS9ZjknwC/\nCvxa26nQDtt8v00foXc8/2+3HvoPPS1KX3N47pZye60B/hHwlb5+l3R7TbZ/YIivsdUUDkP9Jzra\n8cy7gGeq6rf76v3H6/8hMHEWxQHg+iRvT3IJsJnel2AL3dc7krxzYprel5lPtvVPnOmwC7ivr6+d\n7WyJrcBrfR97F8Nb3s0Ne3udYbbb6CCwLcm6dkhlW6stqCTbgd8APlpVb/TVR9L7v1NI8l562+j5\n1tvrSba21+nOvseykH3N9rlbyr/Zvwd8t6p+crhoKbfXVPsHhvkam8837CvtQu8b/j+n9w7gM0u8\n7g/S+0j4OPBYu1wDfAl4otUPABf1LfOZ1uuzzPNsiGn6ei+9s0C+Azw1sV2AdwMPAUeB/wFc0Oqh\n9x8zPdf6Hl3EbfYO4PvAu/pqQ9le9ALqBPD/6B3HvWku24jedwDj7XLjIvU1Tu+488Tr7Pfa2H/c\nnuPHgG8D/6Dvfkbp7ayfA/4L7QeyC9zXrJ+7hf6bnayvVr8b+OdnjF3K7TXV/mForzF/IS1J6lhN\nh5UkSQMyHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUsf/BxVcf6AmcmGcAAAAAElFTkSu\nQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f7ad73f9d90>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.bar(range(1999),samples_every_class)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "val_label = np.load('../data_preprocess/val/val_label_indices.npy')\n",
    "samples_every_class = np.zeros(1999)\n",
    "for labels in train_label_all:\n",
    "    for label in labels:\n",
    "        samples_every_class[label] += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Container object of 1999 artists>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFiJJREFUeJzt3X+s3fV93/HnqzhkKA2xCXcWsklNVi+VW6kJXIGrptEW\nNmNIF7MtQqCq9hiLNQWmRNvUOYs0uqR/kE1rVrSUihUPO0pLWNoIq0Adj0Sr9ocJ14HwM9QXAsKW\nwW5MoBtTMtL3/jifmx78vT/O/Xnu9X0+pKPzPe/v53u+7/M9535f53zP99ipKiRJ6vdTw25AkrT8\nGA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaDJKnDcJAkdawZdgNzdeGFF9amTZuG3YYkrRhH\njhz5i6oaGWTsig2HTZs2MTY2Nuw2JGnFSPLioGM9rCRJ6jAcJEkdhoMkqcNwkCR1GA6SpA7DQZLU\nYThIkjoMB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2G\ngySpw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6hgoHJKsTfLVJN9N8kySX0pyQZJDSY6263Vt\nbJLcnmQ8yeNJLu27n11t/NEku/rqlyV5oi1ze5Is/EOVJA1q0E8OvwP8aVX9HPCLwDPAHuChqtoM\nPNRuA1wNbG6X3cAdAEkuAG4FrgAuB26dCJQ25uN9y22f38OSJM3HjOGQ5F3Ah4C7AKrqR1X1A2AH\nsK8N2wdc26Z3APur5zCwNslFwFXAoao6XVWvAoeA7W3e+VV1uKoK2N93X5KkIRjkk8MlwCngvyV5\nNMnvJ3kHsL6qTrQxLwPr2/QG4KW+5Y+12nT1Y5PUO5LsTjKWZOzUqVMDtC5JmotBwmENcClwR1V9\nAPg//PUhJADaO/5a+PbeqqrurKrRqhodGRlZ7NVJ0qo1SDgcA45V1cPt9lfphcUr7ZAQ7fpkm38c\nuLhv+Y2tNl194yR1SdKQzBgOVfUy8FKS97XSlcDTwAFg4oyjXcB9bfoAsLOdtbQVeK0dfjoIbEuy\nrn0RvQ042Oa9nmRrO0tpZ999SZKGYM2A4/4F8OUk5wLPAzfSC5Z7k9wEvAhc18Y+AFwDjANvtLFU\n1ekknwMeaeM+W1Wn2/QngLuB84AH20WSNCTpfV2w8oyOjtbY2Niw25CkFSPJkaoaHWSsv5CWJHUY\nDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+Eg\nSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqGCgckryQ5IkkjyUZa7ULkhxK\ncrRdr2v1JLk9yXiSx5Nc2nc/u9r4o0l29dUva/c/3pbNQj9QSdLgZvPJ4e9W1furarTd3gM8VFWb\ngYfabYCrgc3tshu4A3phAtwKXAFcDtw6EShtzMf7lts+50ckSZq3+RxW2gHsa9P7gGv76vur5zCw\nNslFwFXAoao6XVWvAoeA7W3e+VV1uKoK2N93X5KkIRg0HAr4epIjSXa32vqqOtGmXwbWt+kNwEt9\nyx5rtenqxyapS5KGZM2A4z5YVceT/E3gUJLv9s+sqkpSC9/eW7Vg2g3wnve8Z7FXJ0mr1kCfHKrq\neLs+CXyN3ncGr7RDQrTrk234ceDivsU3ttp09Y2T1Cfr486qGq2q0ZGRkUFalyTNwYzhkOQdSd45\nMQ1sA54EDgATZxztAu5r0weAne2spa3Aa+3w00FgW5J17YvobcDBNu/1JFvbWUo7++5LkjQEgxxW\nWg98rZ1dugb4g6r60ySPAPcmuQl4EbiujX8AuAYYB94AbgSoqtNJPgc80sZ9tqpOt+lPAHcD5wEP\ntoskaUjSO0Fo5RkdHa2xsbFhtyFJK0aSI30/R5iWv5CWJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ\n6jAcJEkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQO\nw0GS1GE4SJI6DAdJUofhIEnqGDgckpyT5NEkf9JuX5Lk4STjSb6S5NxWf3u7Pd7mb+q7j0+3+rNJ\nruqrb2+18SR7Fu7hSZLmYjafHD4JPNN3+/PAF6rqZ4FXgZta/Sbg1Vb/QhtHki3A9cDPA9uB322B\ncw7wReBqYAtwQxsrSRqSgcIhyUbgI8Dvt9sBPgx8tQ3ZB1zbpne027T5V7bxO4B7quqHVfU9YBy4\nvF3Gq+r5qvoRcE8bK0kakkE/Ofxn4DeAv2q33w38oKrebLePARva9AbgJYA2/7U2/if1M5aZqi5J\nGpIZwyHJrwInq+rIEvQzUy+7k4wlGTt16tSw25Gks9Ygnxx+GfhokhfoHfL5MPA7wNoka9qYjcDx\nNn0cuBigzX8X8P3++hnLTFXvqKo7q2q0qkZHRkYGaF2SNBczhkNVfbqqNlbVJnpfKH+jqn4N+Cbw\nsTZsF3Bfmz7QbtPmf6OqqtWvb2czXQJsBr4FPAJsbmc/ndvWcWBBHp0kaU7WzDxkSv8GuCfJbwGP\nAne1+l3Al5KMA6fp7eypqqeS3As8DbwJ3FxVPwZIcgtwEDgH2FtVT82jL0nSPKX3pn7lGR0drbGx\nsWG3IUkrRpIjVTU6yFh/IS1J6jAcJEkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+EgSeow\nHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNB\nktQxYzgk+RtJvpXkO0meSvLvW/2SJA8nGU/ylSTntvrb2+3xNn9T3319utWfTXJVX317q40n2bPw\nD1OSNBuDfHL4IfDhqvpF4P3A9iRbgc8DX6iqnwVeBW5q428CXm31L7RxJNkCXA/8PLAd+N0k5yQ5\nB/gicDWwBbihjZUkDcmM4VA9/7vdfFu7FPBh4Kutvg+4tk3vaLdp869Mkla/p6p+WFXfA8aBy9tl\nvKqer6ofAfe0sZKkIRnoO4f2Dv8x4CRwCHgO+EFVvdmGHAM2tOkNwEsAbf5rwLv762csM1VdkjQk\nA4VDVf24qt4PbKT3Tv/nFrWrKSTZnWQsydipU6eG0YIkrQqzOlupqn4AfBP4JWBtkjVt1kbgeJs+\nDlwM0Oa/C/h+f/2MZaaqT7b+O6tqtKpGR0ZGZtO6JGkWBjlbaSTJ2jZ9HvD3gWfohcTH2rBdwH1t\n+kC7TZv/jaqqVr++nc10CbAZ+BbwCLC5nf10Lr0vrQ8sxIOTJM3NmpmHcBGwr51V9FPAvVX1J0me\nBu5J8lvAo8BdbfxdwJeSjAOn6e3sqaqnktwLPA28CdxcVT8GSHILcBA4B9hbVU8t2COUJM1aem/q\nV57R0dEaGxsbdhuStGIkOVJVo4OM9RfSkqQOw0GS1GE4SJI6DAeddTbtuX/YLUgrnuEgSeowHCRJ\nHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRh\nOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqWPGcEhycZJvJnk6yVNJPtnqFyQ5lORou17X6klye5LxJI8n\nubTvvna18UeT7OqrX5bkibbM7UmyGA9WkjSYQT45vAn8q6raAmwFbk6yBdgDPFRVm4GH2m2Aq4HN\n7bIbuAN6YQLcClwBXA7cOhEobczH+5bbPv+HJkmaqxnDoapOVNW32/RfAs8AG4AdwL42bB9wbZve\nAeyvnsPA2iQXAVcBh6rqdFW9ChwCtrd551fV4aoqYH/ffUmShmBW3zkk2QR8AHgYWF9VJ9qsl4H1\nbXoD8FLfYsdabbr6sUnqk61/d5KxJGOnTp2aTeuSpFkYOByS/DTwR8Cnqur1/nntHX8tcG8dVXVn\nVY1W1ejIyMhir06SVq2BwiHJ2+gFw5er6o9b+ZV2SIh2fbLVjwMX9y2+sdWmq2+cpC5JGpJBzlYK\ncBfwTFX9dt+sA8DEGUe7gPv66jvbWUtbgdfa4aeDwLYk69oX0duAg23e60m2tnXt7LuvVWXTnvuH\n3YIkAbBmgDG/DPw68ESSx1rt3wK3AfcmuQl4EbiuzXsAuAYYB94AbgSoqtNJPgc80sZ9tqpOt+lP\nAHcD5wEPtoskaUhmDIeq+l/AVL87uHKS8QXcPMV97QX2TlIfA35hpl4kSUvDX0hLkjoMB0lSh+Eg\nYGV9Gb6SepVWKsNhibhDk7SSGA6SpA7DQUPhJylpeTMcJEkdhoMkqcNwkCR1GA6SpA7DQZLUYThI\nkjoMB0lSh+EgLTF/46GVwHCQJHUYDpKkDsNBktRhOMyRx40lnc0MB82J4Sid3QwHSVKH4bAM+a5c\n0rAZDtIsGNxaLWYMhyR7k5xM8mRf7YIkh5IcbdfrWj1Jbk8ynuTxJJf2LbOrjT+aZFdf/bIkT7Rl\nbk+ShX6Qi8mdhaSz0SCfHO4Gtp9R2wM8VFWbgYfabYCrgc3tshu4A3phAtwKXAFcDtw6EShtzMf7\nljtzXZKkJTZjOFTVnwGnzyjvAPa16X3AtX31/dVzGFib5CLgKuBQVZ2uqleBQ8D2Nu/8qjpcVQXs\n77svSdKQzPU7h/VVdaJNvwysb9MbgJf6xh1rtenqxyapr1geZpJ0Npj3F9LtHX8tQC8zSrI7yViS\nsVOnTi3FKiVpVZprOLzSDgnRrk+2+nHg4r5xG1ttuvrGSeqTqqo7q2q0qkZHRkbm2PrkVso7/pXS\np6SVba7hcACYOONoF3BfX31nO2tpK/BaO/x0ENiWZF37InobcLDNez3J1naW0s6++5I6DEdpaayZ\naUCSPwT+DnBhkmP0zjq6Dbg3yU3Ai8B1bfgDwDXAOPAGcCNAVZ1O8jngkTbus1U18SX3J+idEXUe\n8GC7SJKGaMZwqKobpph15SRjC7h5ivvZC+ydpD4G/MJMfQzDpj3388JtHxl2G5K05PyFtCSpw3BY\nZjymLmk5MBz0EwaTpAmGgySpw3AYwEp4R70SepS0chgOGprlGGjLsSdpGAwHaRkypDRshoOWHXeM\n0vAZDpKkDsNhCr57nR+3n7SyGQ7LwErakS5Gr0v5+FfStpaGyXDoM58dR/+yS7EDmljHZOuarnbm\nvLNtZ3m2PR5pWAwHaY4MIp3Nr4FVGw7TvfNerHWd7VbL49Tc+RpZOVZtOCw3gxwemuoPa7Z1LT8+\nV8O3WG8YV+pzazhoqOb6h7Npz/0r9o9uOfGT8+JYyMc8rO1nOJxhsZ+Is/ELYT+59CxmYK20bbmU\n28GAWxyGw4Dm8pFzpe80F6LPYZ25Ndv1zvW5WsrneL73uZzezc7nEOlS/f0Mup6V8vc8W6s6HM7W\nJ7XffA7bLMT9LCfDCuv5rHe+Y+by2Ga6v4Ve33wM85P+bJ7XpXqzspBWdTjM1ULscBfjXclCBcFM\n8xb7cfQvs1BfEk7W83R/gAv1W5FB1jXTfc0mIPqvF3onP9OOctDnajFfp4sRXvN57S32G4PFtGao\nax+SxTpUsFyPN892h7OYZ0kNOm+hji1v2nM/L9z2kYHHzqY+lzGT9TLb0JnLu9D+9U61bV+47SNT\nPgdTbcPptu9CrXe65c6szXTfg8ybynSvz4nbgz7eM+dP9DLVtpjN63ih+MlBktSxbMIhyfYkzyYZ\nT7Jn2P1I0mq2LMIhyTnAF4GrgS3ADUm2DLcrSVq9lkU4AJcD41X1fFX9CLgH2DHkniRp1Vou4bAB\neKnv9rFWkyQNQapq2D2Q5GPA9qr6Z+32rwNXVNUtZ4zbDexuN98HPDvHVV4I/MUcl11M9jU79jU7\n9jU7Z2NfP1NVI4MMXC6nsh4HLu67vbHV3qKq7gTunO/KkoxV1eh872eh2dfs2Nfs2NfsrPa+lsth\npUeAzUkuSXIucD1wYMg9SdKqtSw+OVTVm0luAQ4C5wB7q+qpIbclSavWsggHgKp6AHhgiVY370NT\ni8S+Zse+Zse+ZmdV97UsvpCWJC0vy+U7B0nSMrKqwmGY/0RHkouTfDPJ00meSvLJVv/NJMeTPNYu\n1/Qt8+nW67NJrlrE3l5I8kRb/1irXZDkUJKj7XpdqyfJ7a2vx5Ncukg9va9vmzyW5PUknxrW9kqy\nN8nJJE/21Wa9jZLsauOPJtm1SH39xyTfbev+WpK1rb4pyf/t23a/17fMZe01MN56zyL0NevnbqH/\nZqfo6yt9Pb2Q5LFWX8rtNdX+YXivsapaFRd6X3Q/B7wXOBf4DrBlCdd/EXBpm34n8Of0/qmQ3wT+\n9STjt7Qe3w5c0no/Z5F6ewG48IzafwD2tOk9wOfb9DXAg0CArcDDS/TcvQz8zLC2F/Ah4FLgyblu\nI+AC4Pl2va5Nr1uEvrYBa9r05/v62tQ/7oz7+VbrNa33qxehr1k9d4vxNztZX2fM/0/AvxvC9ppq\n/zC019hq+uQw1H+io6pOVNW32/RfAs8w/a/AdwD3VNUPq+p7wDi9x7BUdgD72vQ+4Nq++v7qOQys\nTXLRIvdyJfBcVb04zZhF3V5V9WfA6UnWOZttdBVwqKpOV9WrwCFg+0L3VVVfr6o3283D9H43NKXW\n2/lVdbh6e5j9fY9lwfqaxlTP3YL/zU7XV3v3fx3wh9PdxyJtr6n2D0N7ja2mcFg2/0RHkk3AB4CH\nW+mW9tFw78THRpa23wK+nuRIer9CB1hfVSfa9MvA+iH0NeF63voHO+ztNWG222gYPf5Teu8wJ1yS\n5NEk/zPJr7TahtbLUvQ1m+duqbfXrwCvVNXRvtqSb68z9g9De42tpnBYFpL8NPBHwKeq6nXgDuBv\nAe8HTtD7WLvUPlhVl9L7V3FvTvKh/pnt3dFQTmtL70eRHwX+eysth+3VMcxtNJUknwHeBL7cSieA\n91TVB4B/CfxBkvOXsKVl+dz1uYG3vglZ8u01yf7hJ5b6NbaawmGgf6JjMSV5G70n/stV9ccAVfVK\nVf24qv4K+K/89aGQJeu3qo6365PA11oPr0wcLmrXJ5e6r+Zq4NtV9Urrcejbq89st9GS9ZjknwC/\nCvxa26nQDtt8v00foXc8/2+3HvoPPS1KX3N47pZye60B/hHwlb5+l3R7TbZ/YIivsdUUDkP9Jzra\n8cy7gGeq6rf76v3H6/8hMHEWxQHg+iRvT3IJsJnel2AL3dc7krxzYprel5lPtvVPnOmwC7ivr6+d\n7WyJrcBrfR97F8Nb3s0Ne3udYbbb6CCwLcm6dkhlW6stqCTbgd8APlpVb/TVR9L7v1NI8l562+j5\n1tvrSba21+nOvseykH3N9rlbyr/Zvwd8t6p+crhoKbfXVPsHhvkam8837CvtQu8b/j+n9w7gM0u8\n7g/S+0j4OPBYu1wDfAl4otUPABf1LfOZ1uuzzPNsiGn6ei+9s0C+Azw1sV2AdwMPAUeB/wFc0Oqh\n9x8zPdf6Hl3EbfYO4PvAu/pqQ9le9ALqBPD/6B3HvWku24jedwDj7XLjIvU1Tu+488Tr7Pfa2H/c\nnuPHgG8D/6Dvfkbp7ayfA/4L7QeyC9zXrJ+7hf6bnayvVr8b+OdnjF3K7TXV/mForzF/IS1J6lhN\nh5UkSQMyHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUsf/BxVcf6AmcmGcAAAAAElFTkSu\nQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f7ad7628490>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.bar(range(1999),samples_every_class)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
